-- 查询某日活跃用户 SELECT user_id, device_type, login_time FROM user_logs WHERE dt ='2025-12-20' AND login_time >='08:00:00' ORDERBY login_time DESC LIMIT 100;
优化建议:
对 dt 和 login_time 建立分区/索引 避免无 LIMIT 的全表扫描
1.1 数据聚合类(指标计算)
1 2 3 4 5 6 7 8 9 10
-- 计算每日DAU和GMV SELECT dt, COUNT(DISTINCT user_id) AS dau, SUM(order_amount) AS gmv, AVG(order_amount) AS avg_order_value FROM dwd_orders WHERE dt BETWEEN'2025-12-01'AND'2025-12-20' GROUPBY dt ORDERBY dt;
1.2 数据清洗类(ETL处理)
用途:清洗脏数据、标准化字段、补全缺失值。 (一般在ETL阶段完成清洗,避免在查询时处理)
1 2 3 4 5 6 7 8 9 10 11
-- 清洗用户表(去空、格式标准化) INSERT OVERWRITE TABLE clean_users SELECT user_id, COALESCE(age, 0) AS age, -- 空值补0 UPPER(TRIM(region)) AS region, -- 去空格并大写 CASE WHEN device IN ('ios', 'android') THEN device ELSE'other' ENDAS device_type FROM raw_users;
1.3 数据关联类(多表JOIN)
构建宽表,关联用户、订单、商品等维度。
1 2 3 4 5 6 7 8 9 10 11
-- 用户订单宽表(关联用户属性) SELECT o.order_id, o.user_id, u.age_group, u.membership_level, o.order_amount, p.category AS product_category FROM fact_orders o LEFTJOIN dim_users u ON o.user_id = u.user_id LEFTJOIN dim_products p ON o.product_id = p.product_idWHERE o.dt ='2025-12-20';
-- 用户消费金额排名(按地区分区) SELECT user_id, region, total_spent, RANK() OVER (PARTITIONBY region ORDERBY total_spent DESC) AS rank FROM ( SELECT user_id, region, SUM(order_amount) AS total_spent FROM dwd_orders WHERE dt >='2025-12-01' GROUPBY user_id, region ) t;
-- 计算7日留存 SELECT a.user_id, a.dt AS install_date, b.dt AS active_date, DATEDIFF(b.dt, a.dt) AS retention_day FROM user_installs a LEFTJOIN user_logs b ON a.user_id = b.user_id AND b.dt BETWEEN a.dt AND DATE_ADD(a.dt, 7) WHERE a.dt ='2025-12-15';
-- 创建用户行为宽表 CREATETABLE dwd_user_behavior_wide AS SELECT f.user_id, d1.age_group, d2.device_type, f.event_type, f.event_timeFROM fact_events f JOIN dim_user d1 ON f.user_id = d1.user_id JOIN dim_device d2 ON f.device_id = d2.device_id;
-- 生成月度汇总表 INSERT OVERWRITE TABLE mau_summary SELECT DATE_FORMAT(dt, 'yyyy-MM') ASmonth, COUNT(DISTINCT user_id) AS mau, SUM(revenue) AS monthly_revenue FROM dwd_orders WHERE dt >='2025-01-01' GROUPBY DATE_FORMAT(dt, 'yyyy-MM');
优化建议:
定期调度更新汇总表 使用分区表按时间归档
1.6 调度执行类(参数化SQL)
用途:配合Airflow等工具实现自动化。
1 2 3 4 5 6 7 8
-- 参数化日报(${bizdate} 由调度系统传入) INSERTINTO daily_report SELECT '${bizdate}'AS dt, COUNT(DISTINCT user_id) AS dau, SUM(order_amount) AS gmv FROM dwd_orders WHERE dt ='${bizdate}';
-- 每日DAU和GMV(日报) SELECT dt, COUNT(DISTINCT user_id) AS dau, SUM(order_amount) AS gmv, ROUND(AVG(order_amount), 2) AS avg_order_value FROM dwd_orders WHERE dt BETWEEN'2025-12-01'AND'2025-12-20' GROUPBY dt ORDERBY dt;
-- 用户地域分布(静态快照) SELECT region, COUNT(user_id) AS user_count FROM dim_users WHERE dt ='2025-12-20' GROUPBY region;
-- 订单下降归因(按渠道+设备下钻) SELECT channel, device_type, COUNT(order_id) AS order_count, SUM(order_amount) AS gmv FROM dwd_orders WHERE dt >='2025-12-15' AND dt <='2025-12-20' GROUPBY channel, device_type ORDERBY gmv DESC;
-- 漏斗转化分析(多步骤JOIN) WITH step1 AS ( SELECTCOUNT(DISTINCT user_id) AS visit_users FROM user_events WHERE event_type ='page_view'AND dt ='2025-12-20' ), step2 AS ( SELECTCOUNT(DISTINCT user_id) AS cart_users FROM user_events WHERE event_type ='add_to_cart'AND dt ='2025-12-20' ) SELECT visit_users, cart_users, ROUND(cart_users *100.0/ visit_users, 2) AS conversion_rate FROM step1, step2;
-- 移动平均预测(7日平滑) SELECT dt, gmv, AVG(gmv) OVER (ORDERBY dt ROWSBETWEEN6 PRECEDING ANDCURRENTROW) AS ma_7d FROM daily_gmv_summary WHERE dt >='2025-11-20';
-- 用户流失预警(历史活跃度对比) SELECT user_id, last_active_date, DATEDIFF('2025-12-20', last_active_date) AS inactive_days, CASE WHEN DATEDIFF('2025-12-20', last_active_date) >7THEN'high_risk' ELSE'normal' ENDAS churn_risk FROM user_last_active;
-- A/B测试转化率对比 SELECT test_group, COUNT(DISTINCT user_id) AS users, SUM(CASEWHEN converted =1THEN1ELSE0END) AS conversions, ROUND(100.0*SUM(converted) /COUNT(DISTINCT user_id), 2) AS conversion_rate FROM ab_test_results WHERE dt ='2025-12-20' GROUPBY test_group;
-- 优惠券ROI分析 SELECT coupon_type, SUM(revenue) AS revenue, SUM(coupon_cost) AS cost, ROUND((SUM(revenue) -SUM(coupon_cost)) /SUM(coupon_cost), 2) AS roi FROM marketing_campaigns WHERE dt >='2025-12-01' GROUPBY coupon_type;
2.2 按看板类型划分
2.2.1 监控型看板(实时监控核心指标)
特点:高频刷新、简单图表(折线图、柱状图) SQL优化重点:预聚合、低延迟
1 2 3 4 5 6 7 8 9
-- 实时GMV监控(5分钟级) SELECT DATE_FORMAT(event_time, 'yyyy-MM-dd HH24:mi') AS time_window, SUM(order_amount) AS gmv FROM dwd_orders WHERE dt ='2025-12-20' AND event_time >= DATE_SUB(NOW(), INTERVAL1HOUR) GROUPBY time_window ORDERBY time_window;
优化建议:
使用增量更新(如 WHERE event_time > LAST_UPDATE_TIME) 物化视图预计算分钟级聚合
2.2.2 探索型看板(多维下钻分析)
特点:交互式分析、支持筛选/下钻/对比 SQL优化重点:宽表建模、索引优化
1 2 3 4 5 6 7 8 9 10 11 12
-- 用户行为宽表(支持多维筛选) SELECT user_id, age_group, device_type, region, COUNT(event_id) AS event_count, SUM(CASEWHEN event_type ='purchase'THEN1ELSE0END) AS purchase_count FROM dwd_user_behavior_wide WHERE dt ='2025-12-20' AND region IN ('North', 'South') -- 支持前端筛选 GROUPBY user_id, age_group, device_type, region;
-- 订单异常归因(分层下钻) WITH daily_orders AS ( SELECT dt, COUNT(order_id) AS order_count FROM dwd_orders WHERE dt BETWEEN'2025-12-15'AND'2025-12-20' GROUPBY dt ), region_breakdown AS ( SELECT region, COUNT(order_id) AS order_count FROM dwd_orders WHERE dt ='2025-12-20' GROUPBY region ) SELECT 'Overall'AS level, dt AS dimension, order_count FROM daily_orders UNIONALL SELECT 'By Region'AS level, region AS dimension, order_count FROM region_breakdown;
优化建议:
使用 UNION ALL 合并多层级结果 限制下钻深度(避免无限递归)
2.2.4 战略型看板(长期趋势与规划)
特点:宏观指标、长期趋势、对比分析 SQL优化重点:时间序列优化、同比环比
1 2 3 4 5 6 7 8 9 10 11 12
-- 年度GMV趋势(同比) SELECT year, SUM(gmv) AS annual_gmv, LAG(SUM(gmv), 1) OVER (ORDERBYyear) AS prev_year_gmv, ROUND( (SUM(gmv) -LAG(SUM(gmv), 1) OVER (ORDERBYyear)) *100.0/ LAG(SUM(gmv), 1) OVER (ORDERBYyear), 2 ) AS yoy_growth FROM monthly_gmv_summary GROUPBYyear ORDERBYyear;
优化建议:
使用 LAG/LEAD 计算同比环比 预计算年度汇总表
三、优化经验
2.1 JOIN优化
优化前:大表JOIN小表,无过滤
1 2 3
SELECT a.*, b.name FROM big_table a LEFTJOIN small_table b ON a.id = b.id; -- 可能触发笛卡尔积
优化后:
1 2 3
SELECT a.*, b.name FROM (SELECT*FROM big_table WHERE dt ='2025-12-20') a -- 先过滤 LEFTJOIN small_table b ON a.id = b.id; -- 小表广播JOIN
2.2 数据倾斜处理
1 2 3 4
-- 某用户ID数据量极大,导致Task卡死 SELECT user_id, SUM(amount) FROM orders GROUPBY user_id;
解决方案(加盐):
1 2 3 4 5
SELECT CONCAT(user_id, '_', FLOOR(RAND() *10)) AS salted_key, SUM(amount) FROM orders GROUPBY salted_key;